##################################################
## Author: Stephanie L. DeMora & Benjamin Newman
## Project: Adolescent Exposure to Economic Inequality and Belief in the "American dream" in Entering Adulthood
## Purpose: Replication Code for Appendix A
## Date: 2025-11-20
##################################################

pkgs <- c("tidyverse", "haven", "janitor", "interflex", "lmerTest", "DT", "here", "jtools",
          "ggeffects", "ggpubr", "clubSandwich", "stargazer", "sandwich", "ggplot2", "zipcodeR", "effectsize")
lapply(pkgs, library, character.only = TRUE)

data_list <- readRDS("Appendix_A_Data.rds")

ipeds_2005 <- data_list$ipeds_2005
ipeds_2006 <- data_list$ipeds_2006
ipeds_2007 <- data_list$ipeds_2007
ipeds_2008 <- data_list$ipeds_2008
CCES <- data_list$CCES_Cumulative
GSS <- data_list$GSS
df1 <- readRDS("Data_Pt1.rds")
df2 <- readRDS("Data_Pt2.rds")
df <- rbind(df1, df2); rm(df1, df2, data_list); gc()

# ---------------------------------------------------------------------------------------------------------------
# Table S1. Yearly Percentage Comparison of Known Races
# ---------------------------------------------------------------------------------------------------------------

ipeds_2005 %>%
  mutate(Total_Pop = rowSums(select(., EFRACE19, EFRACE20, EFRACE18, EFRACE21, EFRACE22))) -> ipeds_2005

# Percent American Indian or Alaska Native
AIAN5 <- (sum(ipeds_2005$EFRACE19, na.rm = T)/sum(ipeds_2005$Total_Pop))*100
# Percent AAPI
AAPI5 <- (sum(ipeds_2005$EFRACE20, na.rm = T)/sum(ipeds_2005$Total_Pop))*100
# Percent Black
Black5 <- (sum(ipeds_2005$EFRACE18, na.rm = T)/sum(ipeds_2005$Total_Pop))*100
# Percent Hispanic
Hispanic5 <- (sum(ipeds_2005$EFRACE21, na.rm = T)/sum(ipeds_2005$Total_Pop))*100
# Percent White
White5 <- (sum(ipeds_2005$EFRACE22, na.rm = T)/sum(ipeds_2005$Total_Pop))*100

ipeds_2006 %>%
  mutate(Total_Pop = rowSums(select(., EFRACE19, EFRACE20, EFRACE18, EFRACE21, EFRACE22))) -> ipeds_2006

# Percent American Indian or Alaska Native
AIAN6 <- (sum(ipeds_2006$EFRACE19, na.rm = T)/sum(ipeds_2006$Total_Pop))*100
# Percent AAPI
AAPI6 <- (sum(ipeds_2006$EFRACE20, na.rm = T)/sum(ipeds_2006$Total_Pop))*100
# Percent Black
Black6 <- (sum(ipeds_2006$EFRACE18, na.rm = T)/sum(ipeds_2006$Total_Pop))*100
# Percent Hispanic
Hispanic6 <- (sum(ipeds_2006$EFRACE21, na.rm = T)/sum(ipeds_2006$Total_Pop))*100
# Percent White
White6 <- (sum(ipeds_2006$EFRACE22, na.rm = T)/sum(ipeds_2006$Total_Pop))*100

ipeds_2007 %>%
  mutate(Total_Pop = rowSums(select(., EFRACE19, EFRACE20, EFRACE18, EFRACE21, EFRACE22))) -> ipeds_2007

# Percent American Indian or Alaska Native
AIAN7 <- (sum(ipeds_2007$EFRACE19, na.rm = T)/sum(ipeds_2007$Total_Pop))*100
# Percent AAPI
AAPI7 <- (sum(ipeds_2007$EFRACE20, na.rm = T)/sum(ipeds_2007$Total_Pop))*100
# Percent Black
Black7 <- (sum(ipeds_2007$EFRACE18, na.rm = T)/sum(ipeds_2007$Total_Pop))*100
# Percent Hispanic
Hispanic7 <- (sum(ipeds_2007$EFRACE21, na.rm = T)/sum(ipeds_2007$Total_Pop))*100
# Percent White
White7 <- (sum(ipeds_2007$EFRACE22, na.rm = T)/sum(ipeds_2007$Total_Pop))*100

ipeds_2008 %>%
  mutate(Total_Pop = rowSums(select(., DVEFAIT, DVEFAPT, DVEFBKT, DVEFHST, DVEFWHT))) -> ipeds_2008

# Percent American Indian or Alaska Native
AIAN8 <- (sum(ipeds_2008$DVEFAIT, na.rm = T)/sum(ipeds_2008$Total_Pop))*100
# Percent AAPI
AAPI8 <- (sum(ipeds_2008$DVEFAPT, na.rm = T)/sum(ipeds_2008$Total_Pop))*100
# Percent Black
Black8 <- (sum(ipeds_2008$DVEFBKT, na.rm = T)/sum(ipeds_2008$Total_Pop))*100
# Percent Hispanic
Hispanic8 <- (sum(ipeds_2008$DVEFHST, na.rm = T)/sum(ipeds_2008$Total_Pop))*100
# Percent White
White8 <- (sum(ipeds_2008$DVEFWHT, na.rm = T)/sum(ipeds_2008$Total_Pop))*100

racial_composition <- tibble(
  Year = c(2005, 2006, 2007, 2008),
  American_Indian_or_Alaska_Native = c(AIAN5, AIAN6, AIAN7, AIAN8),
  Asian_American_and_Pacific_Islander = c(AAPI5, AAPI6, AAPI7, AAPI8),
  Black = c(Black5, Black6, Black7, Black8),
  Hispanic = c(Hispanic5, Hispanic6, Hispanic7, Hispanic8),
  White = c(White5, White6, White7, White8)
)

rm(list = ls(pattern = "(AIAN|AAPI|Black|Hispanic|White)[0-9]+")); gc()

# TFS:

df %>%
  filter(RACEGROUP != 6 & RACEGROUP != 7) %>%
  count(YEAR, RACEGROUP, name = "Total_Pop") -> df_summary

df %>%
  dplyr::filter(RACEGROUP != 6 & RACEGROUP != 7 & !is.na(RACEGROUP)) %>%
  dplyr::filter(YEAR == 2005 | YEAR == 2006 | YEAR == 2007 | YEAR == 2008) %>%
  tabyl(YEAR)

df_percentages <- df_summary %>%
  filter(YEAR == 2005 | YEAR == 2006 | YEAR == 2007 | YEAR == 2008) %>%
  group_by(YEAR) %>%
  mutate(Percentage = Total_Pop / sum(Total_Pop) * 100) %>%
  ungroup()

final_table <- df_percentages %>%
  pivot_wider(names_from = YEAR, values_from = Percentage, names_prefix = "Year_")

race_labels <- c("1" = "American Indian", "2" = "Asian", "3" = "Black",
                 "4" = "Hispanic", "5" = "White", "6" = "Other",
                 "7" = "Two or more race/ethnicity")
final_table$RACEGROUP <- race_labels[final_table$RACEGROUP]


long_format <- final_table %>%
  select(-Total_Pop) %>%
  pivot_longer(cols = starts_with("Year"), names_to = "Year", values_to = "Percentage") %>%
  filter(!is.na(Percentage)) %>%
  mutate(Year = readr::parse_number(Year))

reshaped_table <- long_format %>%
  pivot_wider(names_from = RACEGROUP, values_from = Percentage, values_fill = list(Percentage = 0)) %>%
  arrange(Year)

rm(long_format,final_table, race_labels, df_percentages, df_summary); gc()

# ---------------------------------------------------------------------------------------------------------------

racial_composition$Source <- "IPEDS"
reshaped_table$Source <- "TFS"

racial_composition %>%
  rename(Asian = Asian_American_and_Pacific_Islander,
         `American Indian` = American_Indian_or_Alaska_Native) -> racial_composition

bind_rows(racial_composition, reshaped_table) -> TOTAL_RACE

TOTAL_RACE %>%
  mutate(Other = NA) %>%
  select(Year, Source, `American Indian`, Asian, Black, Hispanic, White, Other) %>%
  arrange(Year) -> TOTAL_RACE
# ---------------------------------------------------------------------------------------------------------------

# CCES

CCES %>%
  filter(!is.na(race), race != 7) %>%
  mutate(race = as_factor(race)) %>%
  tabyl(year,race) %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(affix_sign = F) %>%
  select(year, `Native American`, Asian, Black, Hispanic, White, `Middle Eastern`, Mixed) %>%
  rename(Year = year,
         `American Indian` = `Native American`) %>%
  mutate(Source = "CCES") %>%
  mutate(Other = NA) %>%
  mutate(across(
    c(`American Indian`, Asian, Black, Hispanic, White,
      `Middle Eastern`, Mixed),
    ~ parse_number(.x))) -> TOTAL_RACE2

TOTAL_RACE3 <- bind_rows(TOTAL_RACE, TOTAL_RACE2) %>%
  mutate(Other = as.numeric(Other)) %>%
  select(Year, Source,
         `American Indian`, Asian, Black, Hispanic, White,
         `Middle Eastern`, Mixed, Other) %>%
  arrange(Year, Source)

# ---------------------------------------------------------------------------------------------------------------

GSS %>%
  filter(!is.na(RACE)) %>%
  mutate(RACEr = as_factor(RACE)) %>%
  tabyl(YEAR, RACEr) %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(affix_sign = F) %>%
  rename(
    Year = YEAR,
    `American Indian` = IAP,
    White = WHITE,
    Black = BLACK,
    Other = OTHER) %>%
  mutate(Source = "GSS",
         across(c(`American Indian`, White, Black, Other),
                ~ parse_number(.x)),
         Asian           = NA_real_,
         Hispanic        = NA_real_,
         `Middle Eastern` = NA_real_,
         Mixed           = NA_real_) %>%
  select(Year, Source,
    `American Indian`, Asian, Black, Hispanic, White,
    `Middle Eastern`, Mixed, Other)  -> TOTAL_RACE4

Table_S1 <- bind_rows(TOTAL_RACE3, TOTAL_RACE4) %>%
  arrange(Year, Source)

Table_S1

rm(TOTAL_RACE, TOTAL_RACE2,TOTAL_RACE3, TOTAL_RACE4, reshaped_table, racial_composition); gc()

# ---------------------------------------------------------------------------------------------------------------
# Table S2. Yearly Percentage Comparison of Known Gender
# ---------------------------------------------------------------------------------------------------------------

ipeds_2005 %>%
  mutate(Total_Pop = rowSums(select(., EFRACE15, EFRACE16))) -> ipeds_2005

# Percent Men
Men5 <- (sum(ipeds_2005$EFRACE15, na.rm = T)/sum(ipeds_2005$Total_Pop))*100
# Percent Women
Women5 <- (sum(ipeds_2005$EFRACE16, na.rm = T)/sum(ipeds_2005$Total_Pop))*100

ipeds_2006 %>%
  mutate(Total_Pop = rowSums(select(., EFRACE15, EFRACE16))) -> ipeds_2006

# Percent Men
Men6 <- (sum(ipeds_2006$EFRACE15, na.rm = T)/sum(ipeds_2006$Total_Pop))*100
# Percent Women
Women6 <- (sum(ipeds_2006$EFRACE16, na.rm = T)/sum(ipeds_2006$Total_Pop))*100

ipeds_2007 %>%
  mutate(Total_Pop = rowSums(select(., EFRACE15, EFRACE16))) -> ipeds_2007

# Percent Men
Men7 <- (sum(ipeds_2007$EFRACE15, na.rm = T)/sum(ipeds_2007$Total_Pop))*100
# Percent Women
Women7 <- (sum(ipeds_2007$EFRACE16, na.rm = T)/sum(ipeds_2007$Total_Pop))*100

ipeds_2008 %>%
  mutate(Total_Pop = rowSums(select(., EFTOTLM, EFTOTLW))) -> ipeds_2008

# Percent Men
Men8 <- (sum(ipeds_2008$EFTOTLM, na.rm = T)/sum(ipeds_2008$Total_Pop))*100
# Percent Women
Women8 <- (sum(ipeds_2008$EFTOTLW, na.rm = T)/sum(ipeds_2008$Total_Pop))*100

gender_composition <- tibble(
  Year = c(2005, 2006, 2007, 2008),
  Men = c(Men5, Men6, Men7, Men8),
  Women = c(Women5, Women6, Women7, Women8))

rm(list = ls(pattern = "(Men|Women)[0-9]+")); gc()

# TFS
df %>%
  filter(Age > 2 & Age < 5) -> df

df %>%
  count(YEAR, SEX, name = "Total_Pop") -> df_summary

df_percentages <- df_summary %>%
  filter(YEAR == 2005 | YEAR == 2006 | YEAR == 2007 | YEAR == 2008) %>%
  group_by(YEAR) %>%
  mutate(Percentage = Total_Pop / sum(Total_Pop) * 100) %>%
  ungroup()

df %>%
  dplyr::filter(!is.na(SEX)) %>%
  dplyr::filter(YEAR == 2005 | YEAR == 2006 | YEAR == 2007 | YEAR == 2008) %>%
  tabyl(YEAR)

final_table <- df_percentages %>%
  pivot_wider(names_from = YEAR, values_from = Percentage, names_prefix = "Year_")

gender_labels <- c("1" = "Men", "2" = "Women")
final_table$SEX <- gender_labels[final_table$SEX]

long_format <- final_table %>%
  select(-Total_Pop) %>%
  filter(!is.na(SEX)) %>%
  pivot_longer(cols = starts_with("Year"), names_to = "Year", values_to = "Percentage") %>%
  filter(!is.na(Percentage)) %>%
  mutate(Year = readr::parse_number(Year))

reshaped_table <- long_format %>%
  pivot_wider(names_from = SEX, values_from = Percentage, values_fill = list(Percentage = 0)) %>%
  arrange(Year)

rm(long_format,final_table, gender_labels, df_percentages, df_summary); gc()

gender_composition$Source <- "IPEDS"
reshaped_table$Source <- "TFS"

bind_rows(gender_composition, reshaped_table) -> GENDER

GENDER %>%
  select(Year, Source, Men, Women) -> GENDER

CCES %>%
  filter(!is.na(sex)) %>%
  mutate(sex = as_factor(sex)) %>%
  droplevels(CCES$sex) %>%
  tabyl(year, sex) %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting() %>%
  mutate(Source = "CCES") %>%
  rename(
    Year  = year,
    Men   = Male,
    Women = Female
  ) %>%
  mutate(
    Men   = parse_number(Men),
    Women = parse_number(Women)
  ) -> GENDER2

GENDER3 <- bind_rows(GENDER, GENDER2) %>%
  arrange(Year, Source)

GSS %>%
  filter(!is.na(GENDER1)) %>%
  mutate(GENDER1 = as_factor(GENDER1)) %>%
  tabyl(YEAR, GENDER1) %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(affix_sign = F) %>%
  rename(
    Year = YEAR,
    Men  = MALE,
    Women = FEMALE
  ) %>%
  mutate(Source = "GSS",
    across(c(Men, Women), ~ parse_number(.x))
  ) %>%
  select(Year, Source, Men, Women) -> GENDER4

Table_S2 <- bind_rows(GENDER3, GENDER4) %>%
  arrange(Year, Source)

Table_S2

rm(ipeds_2005, ipeds_2006, ipeds_2007, ipeds_2008, 
   reshaped_table,gender_composition, GENDER, GENDER2, GENDER3, GENDER4); gc()

# ---------------------------------------------------------------------------------------------------------------
# Table S3. Yearly Percentage Comparison of Ideology
# ---------------------------------------------------------------------------------------------------------------

CCES %>%
  filter(year == 2005 | year == 2006 | year == 2007 | year == 2008) %>%
  filter(!is.na(ideo5)) %>%
  mutate(ideo5 = as_factor(ideo5),
         ideo5 = fct_collapse(
           ideo5,
           Liberal = c("Liberal", "Very Liberal"),
           Conservative = c("Conservative", "Very Conservative"),
           Moderate = c("Moderate"))) %>%
  tabyl(year, ideo5) %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(affix_sign = F) %>%
  rename(Year = year) %>%
  mutate(Source = "CCES",
         dplyr::across(c(Liberal, Moderate, Conservative, `Not Sure`),
                       ~ readr::parse_number(.x))) -> IDEOL

GSS %>%
  filter(!is.na(POLVIEWS)) %>%
  mutate(ideo5 = as_factor(POLVIEWS),
         ideo5 = fct_collapse(
           ideo5,
           Liberal = c("LIBERAL", "EXTREMELY LIBERAL", "SLIGHTLY LIBERAL"),
           Conservative = c("CONSERVATIVE", "EXTRMLY CONSERVATIVE", "SLGHTLY CONSERVATIVE"),
           Moderate = c("MODERATE"))) %>%
  tabyl(YEAR, ideo5) %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(affix_sign = F) %>%
  rename(Year = YEAR) %>%
  mutate(Source = "GSS",
         dplyr::across(c(Liberal, Moderate, Conservative),
                       ~ readr::parse_number(.x)),
         `Not Sure` = NA_real_) %>%
  dplyr::select(Year, Source, Liberal, Moderate, Conservative, `Not Sure`) -> IDEOL2

df %>%
  filter(YEAR == 2005 | YEAR == 2006 | YEAR == 2007 | YEAR == 2008) %>%
  mutate(POLVIEWS = as_factor(POLIVIEW)) %>%
  mutate(POLVIEWS = fct_collapse(
    POLVIEWS,
    Liberal = c("Liberal", "Far left"),
    Conservative = c("Conservative", "Far right"),
    Moderate = c("Middle of the road"))) %>%
  tabyl(POLVIEWS, YEAR) %>%
  adorn_percentages("col") %>%
  adorn_pct_formatting() %>%
  pivot_longer(
    cols = -POLVIEWS,
    names_to  = "Year",
    values_to = "perc") %>%
  mutate(Year = as.numeric(Year),
         perc = parse_number(perc)) %>%
  filter(POLVIEWS %in% c("Liberal", "Moderate", "Conservative")) %>%
  select(Year, POLVIEWS, perc) %>%
  pivot_wider(names_from  = POLVIEWS,
              values_from = perc) %>%
  mutate(Source = "TFS", .before = 1) %>%
  dplyr::mutate(`Not Sure` = NA_real_) %>%
  dplyr::select(Year, Source, Liberal, Moderate, Conservative, `Not Sure`) -> IDEOL3


Table_S3 <- bind_rows(IDEOL, IDEOL2, IDEOL3) %>%
  arrange(Year, Source)

Table_S3

rm(IDEOL,IDEOL2,IDEOL3, df, GSS, CCES); gc()

Table_S1; Table_S2; Table_S3
